library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)

1) 18S amplicons

1.1) Data overview

Let’s read the dataset and remove the samples containing less than 1154 reads:

setwd("~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/")

#read data 
tb18_tax <- read.table(file="~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/input/otu_table99_SUR_miTags_18S_classif.txt", head=TRUE, fill=TRUE)

#table dimensions and format before setting column names
dim(tb18_tax) # 8642   58
tb18_tax[1:5,1:5]

row.names(tb18_tax)<-tb18_tax[,1]
tb18_tax<-tb18_tax[,-1]
tb18_tax[is.na(tb18_tax)]<-"NA"

dim(tb18_tax) #8642   57
tb18_tax[1:5,1:5]

#table with taxonomic classification alone:
tb18_class <- tb18_tax[,56:57]
dim(tb18_class) #8642    2
tb18_class[1:5,1:2]

#table with occurence data alone
tb18_tax_occur <- tb18_tax[,1:55]
dim(tb18_tax_occur) #8642   55
tb18_tax_occur[1:5,1:5]

amplicons_per_sample_tb18<-colSums(tb18_tax_occur)
summary(amplicons_per_sample_tb18) #median of amplicons per sample = ~3086)
#we'll remove all samples with less than 1154 reads.
amplicons_per_sample_tb18[which(colSums(tb18_tax_occur)<1154)]

#remove samples with less than 1154 reads
tb18_tax_occur_min1154 <- tb18_tax_occur[,colSums(tb18_tax_occur) >= 1154]
dim(tb18_tax_occur_min1154) #8642   45

#remove samples with omitted in 16S dataset (so that we can compare the relative abundance of 16S and 18S OTUs considering the same samples)
tb18_tax_occur_min1154<-subset(tb18_tax_occur_min1154, select=-c(TARA_70_SUR_0d2_3,TARA_67_SUR_0d2_3))


dim(tb18_tax_occur_min1154) #8642   43

#let's check if we loose any OTU when excluding the mentioned stations.
tb18_tax_occur_min1154_no_cero<-tb18_tax_occur_min1154[,-(which(colSums(tb18_tax_occur_min1154)==0))]
dim(tb18_tax_occur_min1154_no_cero)

Table dimensions and content outline:

## [1] 8642   43
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  0                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  0                  0
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  1                  0
## OTU_4                  0                  0
## OTU_5                  0                  0

Minimum number of reads per station:

min(colSums(tb18_tax_occur_min1154)) 
## [1] 1154

Maximum number of reads per station:

max(colSums(tb18_tax_occur_min1154)) 
## [1] 23781

Identification of stations with higher number of reads:

amplicons_per_sample<-colSums(tb18_tax_occur_min1154)
amplicons_per_sample[which(colSums(tb18_tax_occur_min1154)>20000)]
## TARA_84_SUR_0d2_3 
##             23781

Overall reads per sample:

1.2) Normalization

Let’s normalize the original dataset by randomly subsampling 1154 reads in each station:

tb18_tax_occur_min1154_t<-t(tb18_tax_occur_min1154)
tb18_tax_occur_ss1154<-rrarefy(tb18_tax_occur_min1154_t, 1154)

The normalized table shows the following dimensions and format:

## [1]   43 8642
##                    OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3     0     0     0     0     0
## TARA_109_SUR_0d2_3     0     0     0     0     0
## TARA_110_SUR_0d2_3     0     0     0     0     0
## TARA_111_SUR_0d2_3     0     0     1     0     0
## TARA_112_SUR_0d2_3     0     0     0     0     0

Its content fits with the expected normalization values (1154 reads per station):

rowSums(tb18_tax_occur_ss1154)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3 
##               1154               1154               1154 
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3 
##               1154               1154               1154 
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3 
##               1154               1154               1154 
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3 
##               1154               1154               1154 
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3 
##               1154               1154               1154 
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3 
##               1154               1154               1154 
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3 
##               1154               1154               1154 
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3  TARA_56_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_57_SUR_0d2_3  TARA_62_SUR_0d2_3  TARA_64_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_65_SUR_0d2_3  TARA_66_SUR_0d2_3  TARA_68_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_72_SUR_0d2_3  TARA_76_SUR_0d2_3  TARA_78_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_82_SUR_0d2_3  TARA_84_SUR_0d2_3  TARA_85_SUR_0d2_3 
##               1154               1154               1154 
##  TARA_96_SUR_0d2_3  TARA_98_SUR_0d2_3  TARA_99_SUR_0d2_3 
##               1154               1154               1154 
##             MP0311             MP1517             MP1672 
##               1154               1154               1154 
##             MP2821 
##               1154

Let’s check out how many OTUs don’t appear in the new table:

length(which(colSums(tb18_tax_occur_ss1154)==0))
## [1] 2942

There are 2942 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:

tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154[,-(which(colSums(tb18_tax_occur_ss1154)==0))]
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154_no_cero[mixedorder(row.names(tb18_tax_occur_ss1154_no_cero)),]
dim(tb18_tax_occur_ss1154_no_cero)
## [1]   43 5700

Datasets summary:

dim(tb18_tax) 
## [1] 8642   57
dim(tb18_tax_occur) 
## [1] 8642   55
dim(tb18_tax_occur_ss1154_no_cero) 
## [1]   43 5700

1.3) General community analysis

1.3.1) Richness and evenness (Shannon index)

Most of the samples take Shannon Index values around 6:

1.3.2) Richness: OTU number

Lowest number of OTUs per sample:

## [1] 348

Maximum number of OTUs per sample:

## [1] 791

In most of the samples, we can identify between 600 and 700 OTUs:

plot(sort(OTUs_per_sample_18S_tax_occur_ss1154), pch=19)

boxplot(OTUs_per_sample_18S_tax_occur_ss1154, pch=19)

1.3.3) Index of evenness

1.3.3.1) Pielou’s index

pielou_evenness_18S_tax_occur_ss1154<-tb18_tax_occur_ss1154_div/log(OTUs_per_sample_18S_tax_occur_ss1154)

The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.

The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.

plot(sort(pielou_evenness_18S_tax_occur_ss1154), pch=19)

boxplot(pielou_evenness_18S_tax_occur_ss1154, pch=19)

The OTU_38, with 890 reads, is the most abundant in the overall dataset:

head(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T), n=10L)
##   OTU_38 OTU_3677 OTU_5415  OTU_751 OTU_3374 OTU_2494 OTU_2631 OTU_3569 
##      874      695      571      524      429      361      310      282 
## OTU_3177 OTU_1032 
##      270      267

Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:

plot(log(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T)), pch=19)

1.3.4) Abundance Models

1.3.4.1) Rank-Abundance or Dominance/Diversity Model (“radfit”)

The OTUs abundance distribution fits relativelly close to log-normal model.

1.3.4.2) Preston’s Lognormal Model

According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:

tb18_tax_occur_ss1154_prestonfit<-prestonfit(colSums(tb18_tax_occur_min1154_t))
plot(tb18_tax_occur_ss1154_prestonfit, main="Pooled species")

veiledspec(tb18_tax_occur_ss1154_prestonfit)
## Extrapolated     Observed       Veiled 
##     9937.816     8349.000     1588.816

When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:

## Extrapolated     Observed       Veiled 
##     9734.371     8349.000     1385.371

1.3.5) Rarefaction curves of rarefied and non-rarefied datasets

rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss1154_no_cero)))

#tb18_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb18_rarecurve_step1000_10000

tb18_rarecurve_step100_5700<-rarecurve(rarec_input, step = 100, 5700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=100 & ss=5700\n(5,719 OTUs; 49,622 reads)\n")

#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 100, 8642, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=1000 & ss=100000\n(8,642 OTUs; 244,184 reads)\n")

1.3.6) Beta diversity

1.3.6.1) Dissimilarity matrix using Bray-Curtis index:

The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.

1.3.6.2) Hierarchical clustering

The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.

(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)

1.3.6.3) Non-metric multidimensional scaling

We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.

## 
## Call:
## monoMDS(dist = tb18_tax_occur_ss1154_no_cero.bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss1154_no_cero, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.1463937 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 70 iterations: Stress nearly unchanged (ratio > sratmax)

When implementing a most robut function for computing NMDS plots, the result is quiet the same:

## Run 0 stress 0.1465317 
## Run 1 stress 0.1710814 
## Run 2 stress 0.1529505 
## Run 3 stress 0.1529514 
## Run 4 stress 0.1463796 
## ... New best solution
## ... Procrustes: rmse 0.0160749  max resid 0.09410719 
## Run 5 stress 0.1450501 
## ... New best solution
## ... Procrustes: rmse 0.03346756  max resid 0.2057251 
## Run 6 stress 0.1544138 
## Run 7 stress 0.1529513 
## Run 8 stress 0.1464534 
## Run 9 stress 0.1645093 
## Run 10 stress 0.1464532 
## Run 11 stress 0.1529505 
## Run 12 stress 0.145052 
## ... Procrustes: rmse 0.0005393131  max resid 0.002027162 
## ... Similar to previous best
## Run 13 stress 0.1450487 
## ... New best solution
## ... Procrustes: rmse 0.002278156  max resid 0.01146875 
## Run 14 stress 0.154718 
## Run 15 stress 0.154417 
## Run 16 stress 0.1450194 
## ... New best solution
## ... Procrustes: rmse 0.003631269  max resid 0.01466868 
## Run 17 stress 0.1546418 
## Run 18 stress 0.1522817 
## Run 19 stress 0.1522815 
## Run 20 stress 0.1544133 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available

1.4) Geographical analysis

## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used

Working datasets:

  1. Community matrix: tb18_tax_occur_ss1154_no_cero
dim(tb18_tax_occur_ss1154_no_cero)
## [1]   43 5728
tb18_tax_occur_ss1154_no_cero[1:5, 1:5]
##                    OTU_1 OTU_2 OTU_3 OTU_5 OTU_6
## MP0311                 0     0     0     0     0
## MP1517                 0     0     0     0     1
## MP1672                 0     0     0     0     0
## MP2821                 0     3     0     0     0
## TARA_102_SUR_0d2_3     0     0     0     0     0
  1. Community Bray-Curtis: tb18_tax_occur_ss1154_no_cero.bray
tb18_tax_occur_ss1154_no_cero.bray<-as.matrix(tb18_tax_occur_ss1154_no_cero.bray)
## [1] 43 43
  1. Stations distances in km: geo_distances_MP_18S
dim(geo_distances_MP_18S)
## [1] 43 43

Communities quickly change their composition across geographical distances:

plot(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")

1.4.1) Mantel correlograms

Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.

mantel(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss1154_no_cero.bray) 
## 
## Mantel statistic r: 0.1186 
##       Significance: 0.018 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0646 0.0894 0.1075 0.1334 
## Permutation: free
## Number of permutations: 999

Maximum distance between samples:

## [1] 19543.94

Minimum distance between samples:

## [1] 0

Correlograms:

MP_18s_ss1154_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss1154_mantel_correl_by_1000km)

MP_18s_ss1154_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss1154_mantel_correl_by_100km)

1.5) Abundance vs. occurence

In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).

Regionally abundant OTUs (relative abundance over 0.1%):

##          mean_rabund perc_occur
## OTU_38   0.017492241  88.372093
## OTU_3677 0.013884970  81.395349
## OTU_751  0.010983032  37.209302
## OTU_5415 0.010761356  25.581395
## OTU_3374 0.008826730  93.023256
## OTU_2494 0.006932409  69.767442
## OTU_2631 0.006106163  74.418605
## OTU_3177 0.005682963  48.837209
## OTU_1032 0.005300069  72.093023
## OTU_3569 0.005159002  53.488372
## OTU_933  0.004796260  90.697674
## OTU_3282 0.004211842  37.209302
## OTU_2910 0.004070775  72.093023
## OTU_5120 0.004070775  39.534884
## OTU_3729 0.003909556  79.069767
## OTU_2200 0.003728185  86.046512
## OTU_1921 0.003687880  76.744186
## OTU_1383 0.003627423  88.372093
## OTU_3454 0.003607271  72.093023
## OTU_3227 0.003466205  34.883721
## OTU_1597 0.003385595  69.767442
## OTU_4176 0.003385595  34.883721
## OTU_4144 0.003244529  44.186047
## OTU_4227 0.003204224  32.558140
## OTU_1456 0.002901939  72.093023
## OTU_1833 0.002720567  72.093023
## OTU_974  0.002680263  67.441860
## OTU_4125 0.002599653  79.069767
## OTU_3038 0.002539196  76.744186
## OTU_4123 0.002519044  34.883721
## OTU_4103 0.002418282  37.209302
## OTU_2031 0.002377978  67.441860
## OTU_4518 0.002377978  44.186047
## OTU_3271 0.002357825  32.558140
## OTU_950  0.002317520   4.651163
## OTU_896  0.002297368  74.418605
## OTU_380  0.002156302  83.720930
## OTU_3871 0.002156302  69.767442
## OTU_2654 0.002115997  65.116279
## OTU_2996 0.002055540  69.767442
## OTU_949  0.002055540  65.116279
## OTU_3926 0.002015235  69.767442
## OTU_893  0.002015235  55.813953
## OTU_3560 0.001974930  62.790698
## OTU_2553 0.001974930  83.720930
## OTU_3056 0.001934626  72.093023
## OTU_973  0.001934626  67.441860
## OTU_1018 0.001874169  13.953488
## OTU_758  0.001874169  65.116279
## OTU_717  0.001833864  76.744186
## OTU_1047 0.001813712  67.441860
## OTU_1746 0.001793559  53.488372
## OTU_4260 0.001793559  41.860465
## OTU_256  0.001773407  76.744186
## OTU_2269 0.001753255  86.046512
## OTU_2195 0.001733102  72.093023
## OTU_2965 0.001733102  67.441860
## OTU_4134 0.001733102  48.837209
## OTU_2648 0.001712950  83.720930
## OTU_1343 0.001712950  69.767442
## OTU_2868 0.001712950  48.837209
## OTU_4226 0.001672645  37.209302
## OTU_2686 0.001632340  72.093023
## OTU_2043 0.001632340  60.465116
## OTU_1641 0.001612188  74.418605
## OTU_2305 0.001612188  74.418605
## OTU_3018 0.001612188  72.093023
## OTU_1496 0.001551731  69.767442
## OTU_2518 0.001551731  53.488372
## OTU_4533 0.001551731  39.534884
## OTU_4090 0.001551731  30.232558
## OTU_216  0.001531579  67.441860
## OTU_1997 0.001471122  74.418605
## OTU_2207 0.001450969  72.093023
## OTU_1635 0.001450969  69.767442
## OTU_3124 0.001450969  58.139535
## OTU_437  0.001450969  76.744186
## OTU_793  0.001430817  60.465116
## OTU_4089 0.001430817  37.209302
## OTU_3602 0.001410665  65.116279
## OTU_884  0.001390512  76.744186
## OTU_2120 0.001390512  58.139535
## OTU_3430 0.001390512  34.883721
## OTU_2853 0.001350208  69.767442
## OTU_2887 0.001350208  69.767442
## OTU_1551 0.001350208  65.116279
## OTU_4203 0.001350208  53.488372
## OTU_1066 0.001330055  58.139535
## OTU_2189 0.001330055  58.139535
## OTU_1929 0.001330055  48.837209
## OTU_1395 0.001330055  25.581395
## OTU_2594 0.001309903  69.767442
## OTU_1087 0.001289751  55.813953
## OTU_1446 0.001289751  69.767442
## OTU_1694 0.001289751  55.813953
## OTU_2730 0.001289751  53.488372
## OTU_983  0.001269598  55.813953
## OTU_2986 0.001249446  60.465116
## OTU_4190 0.001249446  30.232558
## OTU_2124 0.001229293  72.093023
## OTU_1628 0.001229293  58.139535
## OTU_628  0.001229293  58.139535
## OTU_922  0.001229293  58.139535
## OTU_6181 0.001229293  23.255814
## OTU_2078 0.001209141  67.441860
## OTU_938  0.001188989  67.441860
## OTU_990  0.001188989  67.441860
## OTU_1001 0.001188989  60.465116
## OTU_5484 0.001188989  34.883721
## OTU_2158 0.001168836  67.441860
## OTU_3196 0.001168836  58.139535
## OTU_2873 0.001148684  72.093023
## OTU_825  0.001148684  44.186047
## OTU_1261 0.001128532  58.139535
## OTU_2456 0.001128532  58.139535
## OTU_2966 0.001128532  58.139535
## OTU_914  0.001128532  58.139535
## OTU_458  0.001108379  60.465116
## OTU_1825 0.001108379  55.813953
## OTU_2089 0.001108379  46.511628
## OTU_5750 0.001108379  25.581395
## OTU_6608 0.001108379  18.604651
## OTU_2086 0.001068075  65.116279
## OTU_1911 0.001068075  48.837209
## OTU_599  0.001068075  48.837209
## OTU_1901 0.001047922  48.837209
## OTU_2219 0.001027770  67.441860
## OTU_1003 0.001027770  62.790698
## OTU_1975 0.001027770  58.139535
## OTU_295  0.001027770  58.139535
## OTU_2906 0.001027770  48.837209
## OTU_4110 0.001027770  41.860465
## OTU_373  0.001007618  51.162791
## OTU_1276 0.001007618  48.837209
## OTU_2466 0.001007618  46.511628
## OTU_3813 0.001007618  39.534884
## OTU_1012 0.001007618  60.465116
## OTU_363  0.001007618  44.186047
dim(tb18_ss1154_abundant_sorted)
## [1] 138   2

Proportion of regionally abundant OTUs (%):

## [1] 2.409218

Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):

##          mean_rabund perc_occur
## OTU_3374 0.008826730   93.02326
## OTU_933  0.004796260   90.69767
## OTU_38   0.017492241   88.37209
## OTU_1383 0.003627423   88.37209
## OTU_2200 0.003728185   86.04651
## OTU_2269 0.001753255   86.04651
## OTU_380  0.002156302   83.72093
## OTU_2553 0.001974930   83.72093
## OTU_2648 0.001712950   83.72093
## OTU_3677 0.013884970   81.39535
dim(otu_tb18_ss1154_cosmop_sorted)
## [1] 10  2

Number and proportion (%) of cosmopolitan OTUs:

## [1] 10
## [1] 0.174581

Number and proportion (%) of rare OTUs:

nrow(otu_tb18_ss1154_rabund_percoccur[otu_tb18_ss1154_rabund_percoccur$mean_rabund < 0.00001 & otu_tb18_ss1154_rabund_percoccur$mean_rabund >0,])
## [1] 0
## [1] 0

1.6) Taxonomic composition analysis

1.6.1) Normalized data

1.6.1.1) Absolute values

Let’s add the taxonomic classification by merging “tb18_tax_occur_ss1154_no_cero” with “tb18_tax”:

## [1] 5728   46
##   Row.names MP0311 MP1517 MP1672 MP2821
## 1     OTU_1      0      0      0      0
## 2    OTU_10      0      1      3      0
## 3   OTU_100      0      0      0      0
## 4  OTU_1000      2      1      0      0
## 5  OTU_1001      0      2      0      5
## [1] 5728   45
##          MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1         0      0      0      0                  0
## OTU_10        0      1      3      0                  0
## OTU_100       0      0      0      0                  0
## OTU_1000      2      1      0      0                  0
## OTU_1001      0      2      0      5                  0
## [1] 5728   46
##       MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1      0      0      0      0                  0
## OTU_2      0      0      0      3                  0
## OTU_3      0      0      0      0                  0
## OTU_5      0      0      0      0                  0
## OTU_6      0      1      0      0                  0
## [1] 1905   46
##        MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14      0      0      0      0                  0
## OTU_19      0      0      0      0                  0
## OTU_29      0      0      0      0                  0
## OTU_72      0      0      0      0                  0
## OTU_74      0      0      0      0                  0
##        MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14      0      0      0      0                  0
## OTU_19      0      0      0      0                  0
## OTU_29      0      0      0      0                  0
## OTU_72      0      0      0      0                  0
## OTU_74      0      0      0      0                  0
## [1] 1044   43
## [1] 43
## [1] 32
## [1] 41
## [1] 43
## [1] 43
## [1] 27
## [1] 39
## [1] 34
## [1] 35
## [1] 0
## [1] 43
## [1] 39
## [1] 35
## [1] 18
## [1] 2
## [1] 0
## [1] 14
## [1] 2
## [1] 0
## [1] 5
## [1] 0
## [1] 0
## [1] 12
## [1] 25
## [1] 4
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 36
## [1] 16
## [1] 21
##                     Group.1    x
## 8               Dinophyceae 5453
## 17         Prymnesiophyceae 5180
## 10          Mamiellophyceae 3173
## 13            Pelagophyceae 2206
## 16 Prasinophyceae_clade-VII 1367
## 7          Dictyochophyceae  622
## 5             Chrysophyceae  599
## 6             Cryptophyceae  380
## 1         Bacillariophyceae  287
## 3      Chlorarachniophyceae  136
## 2             Bolidophyceae  132
## 18         Pyramimonadaceae  111
## 23     other_Prasinophyceae   93
## 9         Eustigmatophyceae   88
## 15  Prasinophyceae_clade-IX   53
## 12           Pavlovophyceae   51
## 20         Trebouxiophyceae   39
## 4             Chlorophyceae   25
## 11     Nephroselmidophyceae   19
## 22            Xanthophyceae    5
## 19             Rhodophyceae    4
## 21              Ulvophyceae    4
## 14             Phaeophyceae    2
##                     Group.1    x
## 8               Dinophyceae 1044
## 17         Prymnesiophyceae  165
## 5             Chrysophyceae  134
## 1         Bacillariophyceae  125
## 10          Mamiellophyceae  102
## 6             Cryptophyceae   57
## 7          Dictyochophyceae   48
## 16 Prasinophyceae_clade-VII   30
## 20         Trebouxiophyceae   30
## 13            Pelagophyceae   23
## 2             Bolidophyceae   21
## 4             Chlorophyceae   20
## 9         Eustigmatophyceae   19
## 23     other_Prasinophyceae   19
## 3      Chlorarachniophyceae   16
## 18         Pyramimonadaceae   14
## 15  Prasinophyceae_clade-IX   13
## 11     Nephroselmidophyceae    7
## 12           Pavlovophyceae    5
## 19             Rhodophyceae    4
## 22            Xanthophyceae    4
## 21              Ulvophyceae    3
## 14             Phaeophyceae    2
##                      reads_per_class OTUs_per_class
## Bacillariophyceae                287            125
## Bolidophyceae                    132             21
## Chlorarachniophyceae             136             16
## Chlorophyceae                     25             20
## Chrysophyceae                    599            134
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                         5453           1044                43
## Prymnesiophyceae                    5180            165                43
## Mamiellophyceae                     3173            102                40
## Pelagophyceae                       2206             23                42
## Prasinophyceae_clade-VII            1367             30                39
## Dictyochophyceae                     622             48                43
## Chrysophyceae                        599            134                42
## Cryptophyceae                        380             57                32
## Bacillariophyceae                    287            125                38
## Chlorarachniophyceae                 136             16                35
## Bolidophyceae                        132             21                28
## Pyramimonadaceae                     111             14                36
## other_Prasinophyceae                  93             19                30
## Eustigmatophyceae                     88             19                22
## Prasinophyceae_clade-IX               53             13                16
## Pavlovophyceae                        51              5                26
## Trebouxiophyceae                      39             30                17
## Chlorophyceae                         25             20                13
## Nephroselmidophyceae                  19              7                11
## Xanthophyceae                          5              4                 1
## Rhodophyceae                           4              4                 3
## Ulvophyceae                            4              3                 1
## Phaeophyceae                           2              2                 1

1.6.1.1) Relative values

##   reads_per_class    OTUs_per_class samples_per_class 
##               100               100              1400
##                          reads_per_class OTUs_per_class samples_per_class
## Dinophyceae                 27.225522992     54.8031496        100.000000
## Prymnesiophyceae            25.862499376      8.6614173        100.000000
## Mamiellophyceae             15.842029058      5.3543307         93.023256
## Pelagophyceae               11.014029657      1.2073491         97.674419
## Prasinophyceae_clade-VII     6.825103600      1.5748031         90.697674
## Dictyochophyceae             3.105497029      2.5196850        100.000000
## Chrysophyceae                2.990663538      7.0341207         97.674419
## Cryptophyceae                1.897248989      2.9921260         74.418605
## Bacillariophyceae            1.432922263      6.5616798         88.372093
## Chlorarachniophyceae         0.679015428      0.8398950         81.395349
## Bolidophyceae                0.659044386      1.1023622         65.116279
## Pyramimonadaceae             0.554196415      0.7349081         83.720930
## other_Prasinophyceae         0.464326726      0.9973753         69.767442
## Eustigmatophyceae            0.439362924      0.9973753         51.162791
## Prasinophyceae_clade-IX      0.264616306      0.6824147         37.209302
## Pavlovophyceae               0.254630785      0.2624672         60.465116
## Trebouxiophyceae             0.194717659      1.5748031         39.534884
## Chlorophyceae                0.124819012      1.0498688         30.232558
## Nephroselmidophyceae         0.094862449      0.3674541         25.581395
## Xanthophyceae                0.024963802      0.2099738          2.325581
## Rhodophyceae                 0.019971042      0.2099738          6.976744
## Ulvophyceae                  0.019971042      0.1574803          2.325581
## Phaeophyceae                 0.009985521      0.1049869          2.325581

Reads per class vs. OTUs per class [PLOT DESCRIPTION]

## Warning in trans$transform(limits): NaNs produced

## Warning in trans$transform(limits): NaNs produced

## Warning in trans$transform(limits): NaNs produced

1.6.2) Non-normalized data

## [1] 8642   46
##   Row.names TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 1     OTU_1                  0                  0                  0
## 2    OTU_10                  0                  3                  2
## 3   OTU_100                  0                  0                  0
## 4  OTU_1000                  0                  0                  0
## 5  OTU_1001                  0                  1                  0
##   TARA_111_SUR_0d2_3
## 1                  0
## 2                  9
## 3                  0
## 4                  0
## 5                  2
## [1] 8642   45
##          TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                     0                  0                  0
## OTU_10                    0                  3                  2
## OTU_100                   0                  0                  0
## OTU_1000                  0                  0                  0
## OTU_1001                  0                  1                  0
##          TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                     0                  0
## OTU_10                    9                  2
## OTU_100                   0                  3
## OTU_1000                  0                  2
## OTU_1001                  2                 15
## [1] 8642   46
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  0                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  0                  0
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  1                  0
## OTU_4                  0                  0
## OTU_5                  0                  0
## [1] 2973   46
##        TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14                  0                  0                  0
## OTU_19                  0                  0                  0
## OTU_29                  0                  1                  0
## OTU_34                  0                  0                  0
## OTU_72                  0                  0                  0
##        TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14                  0                  0
## OTU_19                  0                  0
## OTU_29                  0                  0
## OTU_34                  0                  0
## OTU_72                  0                  2
##        TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14                  0                  0                  0
## OTU_19                  0                  0                  0
## OTU_29                  0                  1                  0
## OTU_34                  0                  0                  0
## OTU_72                  0                  0                  0
##        TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14                  0                  0
## OTU_19                  0                  0
## OTU_29                  0                  0
## OTU_34                  0                  0
## OTU_72                  0                  2
## [1] 1500   43
## [1] 43
## [1] 41
## [1] 43
## [1] 43
## [1] 43
## [1] 39
## [1] 42
## [1] 41
## [1] 43
## [1] 0
## [1] 43
## [1] 41
## [1] 42
## [1] 26
## [1] 3
## [1] 0
## [1] 26
## [1] 4
## [1] 1
## [1] 14
## [1] 0
## [1] 2
## [1] 19
## [1] 37
## [1] 13
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 42
## [1] 29
## [1] 33
##                         Group.1     x
## 19             Prymnesiophyceae 34758
## 8                   Dinophyceae 23637
## 11              Mamiellophyceae 15846
## 14                Pelagophyceae  9306
## 18     Prasinophyceae_clade-VII  3575
## 5                 Chrysophyceae  2824
## 7              Dictyochophyceae  2819
## 1             Bacillariophyceae  2664
## 6                 Cryptophyceae  1922
## 2                 Bolidophyceae   509
## 20             Pyramimonadaceae   507
## 25         other_Prasinophyceae   492
## 9             Eustigmatophyceae   359
## 3          Chlorarachniophyceae   327
## 13               Pavlovophyceae   228
## 17      Prasinophyceae_clade-IX   155
## 22             Trebouxiophyceae   121
## 4                 Chlorophyceae    79
## 12         Nephroselmidophyceae    64
## 24                Xanthophyceae    21
## 21                 Rhodophyceae    16
## 23                  Ulvophyceae     6
## 15                 Phaeophyceae     5
## 10 IncertaeSedis_Archaeplastida     1
## 16          Phaeothamniophyceae     1
##                         Group.1    x
## 8                   Dinophyceae 1500
## 1             Bacillariophyceae  393
## 5                 Chrysophyceae  252
## 19             Prymnesiophyceae  179
## 11              Mamiellophyceae  117
## 6                 Cryptophyceae  107
## 22             Trebouxiophyceae   60
## 7              Dictyochophyceae   53
## 4                 Chlorophyceae   52
## 18     Prasinophyceae_clade-VII   38
## 9             Eustigmatophyceae   27
## 14                Pelagophyceae   26
## 25         other_Prasinophyceae   26
## 2                 Bolidophyceae   25
## 3          Chlorarachniophyceae   21
## 20             Pyramimonadaceae   20
## 17      Prasinophyceae_clade-IX   17
## 21                 Rhodophyceae   15
## 13               Pavlovophyceae   14
## 24                Xanthophyceae   12
## 12         Nephroselmidophyceae    7
## 23                  Ulvophyceae    5
## 15                 Phaeophyceae    4
## 10 IncertaeSedis_Archaeplastida    2
## 16          Phaeothamniophyceae    1
##                      reads_per_class OTUs_per_class
## Bacillariophyceae               2664            393
## Bolidophyceae                    509             25
## Chlorarachniophyceae             327             21
## Chlorophyceae                     79             52
## Chrysophyceae                   2824            252
##                              reads_per_class OTUs_per_class
## Prymnesiophyceae                       34758            179
## Dinophyceae                            23637           1500
## Mamiellophyceae                        15846            117
## Pelagophyceae                           9306             26
## Prasinophyceae_clade-VII                3575             38
## Chrysophyceae                           2824            252
## Dictyochophyceae                        2819             53
## Bacillariophyceae                       2664            393
## Cryptophyceae                           1922            107
## Bolidophyceae                            509             25
## Pyramimonadaceae                         507             20
## other_Prasinophyceae                     492             26
## Eustigmatophyceae                        359             27
## Chlorarachniophyceae                     327             21
## Pavlovophyceae                           228             14
## Prasinophyceae_clade-IX                  155             17
## Trebouxiophyceae                         121             60
## Chlorophyceae                             79             52
## Nephroselmidophyceae                      64              7
## Xanthophyceae                             21             12
## Rhodophyceae                              16             15
## Ulvophyceae                                6              5
## Phaeophyceae                               5              4
## IncertaeSedis_Archaeplastida               1              2
## Phaeothamniophyceae                        1              1
##                              samples_per_class
## Prymnesiophyceae                            43
## Dinophyceae                                 43
## Mamiellophyceae                             43
## Pelagophyceae                              113
## Prasinophyceae_clade-VII                   116
## Chrysophyceae                              103
## Dictyochophyceae                           112
## Bacillariophyceae                          112
## Cryptophyceae                              108
## Bolidophyceae                              111
## Pyramimonadaceae                            97
## other_Prasinophyceae                        65
## Eustigmatophyceae                           35
## Chlorarachniophyceae                        75
## Pavlovophyceae                              75
## Prasinophyceae_clade-IX                     50
## Trebouxiophyceae                            18
## Chlorophyceae                                1
## Nephroselmidophyceae                         1
## Xanthophyceae                                1
## Rhodophyceae                                 1
## Ulvophyceae                                  1
## Phaeophyceae                                 1
## IncertaeSedis_Archaeplastida                 1
## Phaeothamniophyceae                          1
##   reads_per_class    OTUs_per_class samples_per_class 
##           100.000           100.000          3086.047
##                              reads_per_class OTUs_per_class
## Prymnesiophyceae                3.467409e+01     6.02085436
## Dinophyceae                     2.357994e+01    50.45408678
## Mamiellophyceae                 1.580775e+01     3.93541877
## Pelagophyceae                   9.283534e+00     0.87453750
## Prasinophyceae_clade-VII        3.566369e+00     1.27817020
## Chrysophyceae                   2.817182e+00     8.47628658
## Dictyochophyceae                2.812194e+00     1.78271107
## Bacillariophyceae               2.657569e+00    13.21897074
## Cryptophyceae                   1.917360e+00     3.59905819
## Bolidophyceae                   5.077712e-01     0.84090145
## Pyramimonadaceae                5.057760e-01     0.67272116
## other_Prasinophyceae            4.908122e-01     0.87453750
## Eustigmatophyceae               3.581333e-01     0.90817356
## Chlorarachniophyceae            3.262106e-01     0.70635721
## Pavlovophyceae                  2.274496e-01     0.47090481
## Prasinophyceae_clade-IX         1.546258e-01     0.57181298
## Trebouxiophyceae                1.207079e-01     2.01816347
## Chlorophyceae                   7.880928e-02     1.74907501
## Nephroselmidophyceae            6.384549e-02     0.23545240
## Xanthophyceae                   2.094930e-02     0.40363269
## Rhodophyceae                    1.596137e-02     0.50454087
## Ulvophyceae                     5.985515e-03     0.16818029
## Phaeophyceae                    4.987929e-03     0.13454423
## IncertaeSedis_Archaeplastida    9.975858e-04     0.06727212
## Phaeothamniophyceae             9.975858e-04     0.03363606
##                              samples_per_class
## Prymnesiophyceae                    100.000000
## Dinophyceae                         100.000000
## Mamiellophyceae                     100.000000
## Pelagophyceae                       262.790698
## Prasinophyceae_clade-VII            269.767442
## Chrysophyceae                       239.534884
## Dictyochophyceae                    260.465116
## Bacillariophyceae                   260.465116
## Cryptophyceae                       251.162791
## Bolidophyceae                       258.139535
## Pyramimonadaceae                    225.581395
## other_Prasinophyceae                151.162791
## Eustigmatophyceae                    81.395349
## Chlorarachniophyceae                174.418605
## Pavlovophyceae                      174.418605
## Prasinophyceae_clade-IX             116.279070
## Trebouxiophyceae                     41.860465
## Chlorophyceae                         2.325581
## Nephroselmidophyceae                  2.325581
## Xanthophyceae                         2.325581
## Rhodophyceae                          2.325581
## Ulvophyceae                           2.325581
## Phaeophyceae                          2.325581
## IncertaeSedis_Archaeplastida          2.325581
## Phaeothamniophyceae                   2.325581

OTUs per class vs. samples in which they occur [PLOT DESCRIPTION]

## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_text).

Reads per class vs. OTUs per class [PLOT DESCRIPTION]

## Warning in trans$transform(limits): NaNs produced

2) 16S amplicons

2.1) Data overview

Let’s read the dataset and remove the samples containing less than 36155 reads:

## [1] 28294    59
##   OTU_no TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## 1  OTU_1                  0                  0                  1
## 2  OTU_2                  1                  0                  0
## 3  OTU_3                  0                  0                  0
## 4  OTU_4                  0                  2                  3
## 5  OTU_5                  0                  0                  0
##   TARA_110_SUR_0d2_3
## 1                  0
## 2                  0
## 3                  0
## 4                  1
## 5                  0
## [1] 28294    58
##       TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## OTU_1                  0                  0                  1
## OTU_2                  1                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  2                  3
## OTU_5                  0                  0                  0
##       TARA_110_SUR_0d2_3 TARA_111_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  0                  0
## OTU_4                  1                  5
## OTU_5                  0                  0
## [1] 28294     3
##                                                                                                                                                                                 OTUId
## OTU_1                                                               SILVA.v123_JN018772.1.1342_Bacteria;Proteobacteria;Alphaproteobacteria;SAR11_clade;Surface_1;uncultured_bacterium
## OTU_2                                                           SILVA.v123_GU474892.2198.3716_Bacteria;Marinimicrobia__SAR406_clade_;uncultured_Marinimicrobia_bacterium_HF4000_22B16
## OTU_3                SILVA.v123_KC899221.1.1360_Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales;Pseudoalteromonadaceae;Pseudoalteromonas;uncultured_Pseudoalteromonas_sp.
## OTU_4 SILVA.v123_HQ242598.1.1472_Bacteria;Proteobacteria;Gammaproteobacteria;Thiotrichales;Thiotrichales_Incertae_Sedis;Candidatus_Endoecteinascidia;uncultured_gamma_proteobacterium
## OTU_5                                                                                   SILVA.v123_GQ337196.1.1462_Bacteria;Chloroflexi;SAR202_clade;uncultured_Chloroflexi_bacterium
##              class_A        class_B
## OTU_1 other_bacteria other_bacteria
## OTU_2 other_bacteria other_bacteria
## OTU_3 other_bacteria other_bacteria
## OTU_4 other_bacteria other_bacteria
## OTU_5 other_bacteria other_bacteria
## [1] 28294    55
##       TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## OTU_1                  0                  0                  1
## OTU_2                  1                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  0                  2                  3
## OTU_5                  0                  0                  0
##       TARA_110_SUR_0d2_3 TARA_111_SUR_0d2_3
## OTU_1                  0                  0
## OTU_2                  0                  0
## OTU_3                  0                  0
## OTU_4                  1                  5
## OTU_5                  0                  0
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13690   50950   83020   77920  100900  158900
##            MP1857            MP2243            MP1176            MP1421 
##             13690             17749             21609             26962 
## TARA_70_SUR_0d2_3 TARA_67_SUR_0d2_3 
##             28965             33399
## [1] 28294    49
## [1] 28294    43
## [1] 28294     0

Table dimensions and content outline:

## [1] 28294    43
##       TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1                  0                  1                  0
## OTU_2                  0                  0                  0
## OTU_3                  0                  0                  0
## OTU_4                  2                  3                  1
## OTU_5                  0                  0                  0
##       TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1                  0                  1
## OTU_2                  0                  0
## OTU_3                  0                  0
## OTU_4                  5                  2
## OTU_5                  0                  0

Minimum number of reads per station:

min(colSums(tb16_tax_occur_min36155)) 
## [1] 36155

Maximum number of reads per station:

max(colSums(tb16_tax_occur_min36155)) 
## [1] 158933

Identification of stations with higher number of reads:

amplicons_per_sample<-colSums(tb16_tax_occur_min36155)
amplicons_per_sample[which(colSums(tb16_tax_occur_min36155)>150000)]
## TARA_64_SUR_0d2_3 TARA_85_SUR_0d2_3 
##            158933            150654

Overall reads per sample:

2.2) Normalization

Let’s normalize the original dataset by randomly subsampling 36155 reads in each station:

tb16_tax_occur_min36155_t<-t(tb16_tax_occur_min36155)
tb16_tax_occur_ss36155<-rrarefy(tb16_tax_occur_min36155_t, 36155)

The normalized table shows the following dimensions and format:

## [1]    43 28294
##                    OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3     0     0     0     1     0
## TARA_109_SUR_0d2_3     1     0     0     0     0
## TARA_110_SUR_0d2_3     0     0     0     0     0
## TARA_111_SUR_0d2_3     0     0     0     1     0
## TARA_112_SUR_0d2_3     0     0     0     0     0

Its content fits with the expected normalization values (36155 reads per station):

rowSums(tb16_tax_occur_ss36155)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3 
##              36155              36155              36155 
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3 
##              36155              36155              36155 
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3 
##              36155              36155              36155 
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3 
##              36155              36155              36155 
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3 
##              36155              36155              36155 
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3 
##              36155              36155              36155 
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3 
##              36155              36155              36155 
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3  TARA_56_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_57_SUR_0d2_3  TARA_62_SUR_0d2_3  TARA_64_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_65_SUR_0d2_3  TARA_66_SUR_0d2_3  TARA_68_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_72_SUR_0d2_3  TARA_76_SUR_0d2_3  TARA_78_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_82_SUR_0d2_3  TARA_84_SUR_0d2_3  TARA_85_SUR_0d2_3 
##              36155              36155              36155 
##  TARA_96_SUR_0d2_3  TARA_98_SUR_0d2_3  TARA_99_SUR_0d2_3 
##              36155              36155              36155 
##             MP0311             MP1517             MP1672 
##              36155              36155              36155 
##             MP2821 
##              36155

Let’s check out how many OTUs don’t appear in the new table:

length(which(colSums(tb16_tax_occur_ss36155)==0)) #46
## [1] 8531

There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:

tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155[,-(which(colSums(tb16_tax_occur_ss36155)==0))]
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155_no_cero[mixedorder(row.names(tb16_tax_occur_ss36155_no_cero)),]
dim(tb16_tax_occur_ss36155_no_cero) #92 913
## [1]    43 19763

Datasets summary:

dim(tb16_tax) #959 128
## [1] 28294    58
dim(tb16_tax_occur) #959 122
## [1] 28294    55
dim(tb16_tax_occur_ss36155_no_cero) #92 913
## [1]    43 19763

2.3) General community analysis

2.3.1) Richness and evenness (Shannon index)

Most of the samples take Shannon Index values around 6:

2.3.2) Richness: OTU number

Lowest number of OTUs per sample:

## [1] 2511

Maximum number of OTUs per sample:

## [1] 5077

In most of the samples, we can identify between 600 and 700 OTUs:

plot(sort(OTUs_per_sample_16S_tax_occur_ss36155), pch=19)

boxplot(OTUs_per_sample_16S_tax_occur_ss36155, pch=19)

2.3.3) Index of evenness

2.3.3.1) Pielou’s index

pielou_evenness_16S_tax_occur_ss36155<-tb16_tax_occur_ss36155_div/log(OTUs_per_sample_16S_tax_occur_ss36155)

The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.

The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.

plot(sort(pielou_evenness_16S_tax_occur_ss36155), pch=19)

boxplot(pielou_evenness_16S_tax_occur_ss36155, pch=19)

The OTU_38, with 890 reads, is the most abundant in the overall dataset:

head(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T), n=10L)
## OTU_1754 OTU_2893  OTU_761 OTU_2428 OTU_3736 OTU_1942 OTU_2466 OTU_3271 
##    17333    13350    13260    11886    11165    10033     9996     8685 
## OTU_5398 OTU_5330 
##     8431     7718

Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:

plot(log(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T)), pch=19)

2.3.4) Abundance Models

2.3.4.1) Rank-Abundance or Dominance/Diversity Model (“radfit”)

The OTUs abundance distribution fits relativelly close to log-normal model.

2.3.4.2) Preston’s Lognormal Model

According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:

tb16_tax_occur_ss36155_prestonfit<-prestonfit(colSums(tb16_tax_occur_min36155_t))
plot(tb16_tax_occur_ss36155_prestonfit, main="Pooled species")

veiledspec(tb16_tax_occur_ss36155_prestonfit)
## Extrapolated     Observed       Veiled 
##     108452.1      24691.0      83761.1

When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:

## Extrapolated     Observed       Veiled 
##     75267.64     24691.00     50576.64

2.3.5) Rarefaction curves of rarefied and non-rarefied datasets

rarec_input<-t(as.matrix(colSums(tb16_tax_occur_ss36155_no_cero)))

#tb16_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb16_rarecurve_step1000_10000

tb16_rarecurve_step100_5700<-rarecurve(rarec_input, step = 1000, 19700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=100 & ss=5700\n(19,756 OTUs; 1,554,665 reads)\n")

#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb16_tax_occur))))
tb16_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 1000, 28200, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity non-rarefied step=1000 & ss=100000\n(28,294 OTUs; 4,285,523 reads)\n")

2.3.6) Beta diversity

2.3.6.1) Dissimilarity matrix using Bray-Curtis index:

The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.

2.3.6.2) Hierarchical clustering

The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.

(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)

2.3.6.3) Non-metric multidimensional scaling

We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.

## 
## Call:
## monoMDS(dist = tb16_tax_occur_ss36155_no_cero.bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb16_tax_occur_ss36155_no_cero, method = "bray")'
## 
## Dimensions: 2 
## Stress:     7.66663e-05 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 116 iterations: Stress nearly zero (< smin)

When implementing a most robut function for computing NMDS plots, the result is quiet the same:

## Run 0 stress 9.985152e-05 
## Run 1 stress 9.987095e-05 
## ... Procrustes: rmse 6.486584e-05  max resid 0.0002957008 
## ... Similar to previous best
## Run 2 stress 9.64942e-05 
## ... New best solution
## ... Procrustes: rmse 1.626488e-05  max resid 7.824625e-05 
## ... Similar to previous best
## Run 3 stress 9.628563e-05 
## ... New best solution
## ... Procrustes: rmse 3.677841e-06  max resid 1.505732e-05 
## ... Similar to previous best
## Run 4 stress 9.206385e-05 
## ... New best solution
## ... Procrustes: rmse 1.267344e-05  max resid 3.270834e-05 
## ... Similar to previous best
## Run 5 stress 9.344504e-05 
## ... Procrustes: rmse 6.186297e-05  max resid 0.0001878703 
## ... Similar to previous best
## Run 6 stress 9.751818e-05 
## ... Procrustes: rmse 7.032632e-05  max resid 0.000206771 
## ... Similar to previous best
## Run 7 stress 9.813187e-05 
## ... Procrustes: rmse 5.572368e-05  max resid 0.0002609441 
## ... Similar to previous best
## Run 8 stress 9.341067e-05 
## ... Procrustes: rmse 3.81964e-05  max resid 0.0001407819 
## ... Similar to previous best
## Run 9 stress 7.181514e-05 
## ... New best solution
## ... Procrustes: rmse 2.218769e-05  max resid 5.003509e-05 
## ... Similar to previous best
## Run 10 stress 9.109377e-05 
## ... Procrustes: rmse 5.475929e-05  max resid 0.0002349282 
## ... Similar to previous best
## Run 11 stress 9.781534e-05 
## ... Procrustes: rmse 5.708114e-05  max resid 0.0002415838 
## ... Similar to previous best
## Run 12 stress 9.916223e-05 
## ... Procrustes: rmse 3.201431e-05  max resid 6.966896e-05 
## ... Similar to previous best
## Run 13 stress 9.722294e-05 
## ... Procrustes: rmse 2.915755e-05  max resid 6.467888e-05 
## ... Similar to previous best
## Run 14 stress 9.514618e-05 
## ... Procrustes: rmse 5.950623e-05  max resid 0.0002176754 
## ... Similar to previous best
## Run 15 stress 9.58663e-05 
## ... Procrustes: rmse 4.567358e-05  max resid 0.0001514342 
## ... Similar to previous best
## Run 16 stress 8.324376e-05 
## ... Procrustes: rmse 6.040324e-05  max resid 0.0002344572 
## ... Similar to previous best
## Run 17 stress 9.675721e-05 
## ... Procrustes: rmse 5.497252e-05  max resid 0.0002349048 
## ... Similar to previous best
## Run 18 stress 9.328282e-05 
## ... Procrustes: rmse 2.492408e-05  max resid 6.550225e-05 
## ... Similar to previous best
## Run 19 stress 9.692258e-05 
## ... Procrustes: rmse 5.376792e-05  max resid 0.0002431635 
## ... Similar to previous best
## Run 20 stress 8.975387e-05 
## ... Procrustes: rmse 4.531961e-05  max resid 0.0001704904 
## ... Similar to previous best
## *** Solution reached
## Warning in metaMDS(tb16_tax_occur_ss36155_no_cero.bray): Stress is (nearly)
## zero - you may have insufficient data
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available

2.4) Geographical analysis

## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used

Working datasets:

  1. Community matrix: tb16_tax_occur_ss36155_no_cero
dim(tb16_tax_occur_ss36155_no_cero)
## [1]    43 19763
tb16_tax_occur_ss36155_no_cero[1:5, 1:5]
##                    OTU_1 OTU_3 OTU_4 OTU_5 OTU_7
## MP0311                 0     0     0     0     0
## MP1517                 0     0     3     0     1
## MP1672                 0     0     0     0     0
## MP2821                 1     0     0     0     1
## TARA_102_SUR_0d2_3     0     0     1     0     0
  1. Community Bray-Curtis: tb16_tax_occur_ss36155_no_cero.bray
tb16_tax_occur_ss36155_no_cero.bray<-as.matrix(tb16_tax_occur_ss36155_no_cero.bray)
## [1] 43 43
  1. Stations distances in km: geo_distances_MP_16S
dim(geo_distances_MP_16S)
## [1] 43 43

Communities quickly change their composition across geographical distances:

plot(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")

2.4.1) Mantel correlograms

Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.

mantel(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = geo_distances_MP_16S, ydis = tb16_tax_occur_ss36155_no_cero.bray) 
## 
## Mantel statistic r: 0.02826 
##       Significance: 0.279 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0873 0.1167 0.1444 0.1697 
## Permutation: free
## Number of permutations: 999

Maximum distance between samples:

## [1] 19543.94

Minimum distance between samples:

## [1] 0

Correlograms:

MP_16s_ss36155_mantel_correl_by_1000km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=1000))
plot(MP_16s_ss36155_mantel_correl_by_1000km)

MP_16s_ss36155_mantel_correl_by_100km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=100))
plot(MP_16s_ss36155_mantel_correl_by_100km)

2.5) Abundance vs. occurence

In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).

Regionally abundant OTUs (relative abundance over 0.1%):

##           mean_rabund perc_occur
## OTU_1754  0.011149026   93.02326
## OTU_2893  0.008587059   93.02326
## OTU_761   0.008529169   93.02326
## OTU_2428  0.007645377   93.02326
## OTU_3736  0.007181611   93.02326
## OTU_1942  0.006453480   93.02326
## OTU_2466  0.006429681   93.02326
## OTU_3271  0.005586413   90.69767
## OTU_5398  0.005423033   90.69767
## OTU_5330  0.004964414   93.02326
## OTU_27927 0.003777663   48.83721
## OTU_5352  0.003766728   58.13953
## OTU_3187  0.003630364  100.00000
## OTU_27901 0.003573117   81.39535
## OTU_2866  0.003430321   90.69767
## OTU_6083  0.003165955   86.04651
## OTU_8011  0.003102276   83.72093
## OTU_709   0.003061110   93.02326
## OTU_635   0.003025732   93.02326
## OTU_3172  0.002888082   88.37209
## OTU_3311  0.002843056   90.69767
## OTU_5727  0.002715698   93.02326
## OTU_3259  0.002529805   58.13953
## OTU_5447  0.002478991  100.00000
## OTU_29    0.002464840  100.00000
## OTU_5403  0.002407593   90.69767
## OTU_27900 0.002379934  100.00000
## OTU_4048  0.002347129  100.00000
## OTU_3162  0.002286023   95.34884
## OTU_1329  0.002241640   93.02326
## OTU_5633  0.002238424   95.34884
## OTU_5432  0.002228776   93.02326
## OTU_28061 0.002150302   51.16279
## OTU_5441  0.002136152   93.02326
## OTU_6164  0.002118784   93.02326
## OTU_3308  0.002082121   97.67442
## OTU_3004  0.002071829   93.02326
## OTU_2877  0.002066683   95.34884
## OTU_1676  0.002044814   88.37209
## OTU_8163  0.002039668  100.00000
## OTU_5635  0.002038381  100.00000
## OTU_5978  0.001993355  100.00000
## OTU_5961  0.001958621  100.00000
## OTU_3023  0.001958621   93.02326
## OTU_8153  0.001957335   93.02326
## OTU_1738  0.001929676   93.02326
## OTU_5772  0.001898801  100.00000
## OTU_6067  0.001894942  100.00000
## OTU_5752  0.001840911   90.69767
## OTU_5653  0.001828047   93.02326
## OTU_27912 0.001820971   46.51163
## OTU_3197  0.001815825   81.39535
## OTU_5301  0.001794599   95.34884
## OTU_3240  0.001783021   93.02326
## OTU_3810  0.001757292   93.02326
## OTU_4926  0.001751503   93.02326
## OTU_5879  0.001743784   93.02326
## OTU_4664  0.001741211  100.00000
## OTU_4366  0.001663381  100.00000
## OTU_4560  0.001658878  100.00000
## OTU_3680  0.001653732   93.02326
## OTU_9255  0.001645371   86.04651
## OTU_27934 0.001615139   95.34884
## OTU_5327  0.001590696   93.02326
## OTU_333   0.001588123  100.00000
## OTU_4991  0.001551460   95.34884
## OTU_6155  0.001512866  100.00000
## OTU_542   0.001498072   93.02326
## OTU_3731  0.001496142  100.00000
## OTU_5691  0.001470413   93.02326
## OTU_5607  0.001467197   93.02326
## OTU_23    0.001443398   93.02326
## OTU_7987  0.001436966  100.00000
## OTU_3198  0.001436966   93.02326
## OTU_5804  0.001425387   93.02326
## OTU_8367  0.001419598  100.00000
## OTU_5662  0.001409950   90.69767
## OTU_5968  0.001397086  100.00000
## OTU_3803  0.001385507  100.00000
## OTU_5741  0.001373929  100.00000
## OTU_5500  0.001371357  100.00000
## OTU_5651  0.001368140   93.02326
## OTU_5985  0.001362351  100.00000
## OTU_5667  0.001355276   93.02326
## OTU_27938 0.001353989   93.02326
## OTU_7407  0.001341125   90.69767
## OTU_5665  0.001334693   93.02326
## OTU_5925  0.001328260   86.04651
## OTU_8458  0.001327617  100.00000
## OTU_5742  0.001326974   93.02326
## OTU_4036  0.001322471   97.67442
## OTU_5564  0.001316682   93.02326
## OTU_3326  0.001315396   90.69767
## OTU_5911  0.001307034  100.00000
## OTU_6102  0.001303175   93.02326
## OTU_3347  0.001297386   97.67442
## OTU_5511  0.001293526   93.02326
## OTU_5624  0.001290953   95.34884
## OTU_5445  0.001281948  100.00000
## OTU_5942  0.001259435  100.00000
## OTU_5449  0.001256219   93.02326
## OTU_5823  0.001253003   93.02326
## OTU_28017 0.001251717   79.06977
## OTU_5491  0.001247857  100.00000
## OTU_5519  0.001240782   48.83721
## OTU_28046 0.001222771   55.81395
## OTU_5523  0.001213123  100.00000
## OTU_5288  0.001213123   90.69767
## OTU_5586  0.001207334   95.34884
## OTU_3214  0.001204761   93.02326
## OTU_5414  0.001204118  100.00000
## OTU_5268  0.001193826  100.00000
## OTU_5786  0.001188681   83.72093
## OTU_5266  0.001177102   93.02326
## OTU_6193  0.001176459  100.00000
## OTU_5493  0.001173886   95.34884
## OTU_5880  0.001172600   90.69767
## OTU_8377  0.001168741  100.00000
## OTU_7455  0.001168097  100.00000
## OTU_5976  0.001164881   93.02326
## OTU_8146  0.001164238  100.00000
## OTU_5355  0.001160379  100.00000
## OTU_10747 0.001144941   55.81395
## OTU_934   0.001139795  100.00000
## OTU_12129 0.001116639   65.11628
## OTU_9267  0.001112780   83.72093
## OTU_5982  0.001111493   93.02326
## OTU_6047  0.001099915  100.00000
## OTU_5821  0.001091553   93.02326
## OTU_5222  0.001090910   93.02326
## OTU_5211  0.001081262   95.34884
## OTU_8436  0.001075473  100.00000
## OTU_4083  0.001072257   95.34884
## OTU_5428  0.001070970  100.00000
## OTU_3789  0.001064538  100.00000
## OTU_520   0.001058749   93.02326
## OTU_5499  0.001057463   93.02326
## OTU_5817  0.001042025  100.00000
## OTU_3207  0.001030447   90.69767
## OTU_223   0.001027231   93.02326
## OTU_5815  0.001025301  100.00000
## OTU_759   0.001018226   86.04651
## OTU_2789  0.001014366  100.00000
## OTU_3173  0.001013080   83.72093
## OTU_5988  0.001011150   93.02326
## OTU_5800  0.001009864   93.02326
## OTU_2980  0.001006648  100.00000
## OTU_790   0.001005361  100.00000
## OTU_28060 0.001001502   41.86047
dim(tb16_ss36155_abundant_sorted)
## [1] 149   2

Proportion of regionally abundant OTUs (%):

## [1] 0.7539341

Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):

##           mean_rabund perc_occur
## OTU_3187  0.003630364  100.00000
## OTU_5447  0.002478991  100.00000
## OTU_29    0.002464840  100.00000
## OTU_27900 0.002379934  100.00000
## OTU_4048  0.002347129  100.00000
## OTU_8163  0.002039668  100.00000
## OTU_5635  0.002038381  100.00000
## OTU_5978  0.001993355  100.00000
## OTU_5961  0.001958621  100.00000
## OTU_5772  0.001898801  100.00000
## OTU_6067  0.001894942  100.00000
## OTU_4664  0.001741211  100.00000
## OTU_4366  0.001663381  100.00000
## OTU_4560  0.001658878  100.00000
## OTU_333   0.001588123  100.00000
## OTU_6155  0.001512866  100.00000
## OTU_3731  0.001496142  100.00000
## OTU_7987  0.001436966  100.00000
## OTU_8367  0.001419598  100.00000
## OTU_5968  0.001397086  100.00000
## OTU_3803  0.001385507  100.00000
## OTU_5741  0.001373929  100.00000
## OTU_5500  0.001371357  100.00000
## OTU_5985  0.001362351  100.00000
## OTU_8458  0.001327617  100.00000
## OTU_5911  0.001307034  100.00000
## OTU_5445  0.001281948  100.00000
## OTU_5942  0.001259435  100.00000
## OTU_5491  0.001247857  100.00000
## OTU_5523  0.001213123  100.00000
## OTU_5414  0.001204118  100.00000
## OTU_5268  0.001193826  100.00000
## OTU_6193  0.001176459  100.00000
## OTU_8377  0.001168741  100.00000
## OTU_7455  0.001168097  100.00000
## OTU_8146  0.001164238  100.00000
## OTU_5355  0.001160379  100.00000
## OTU_934   0.001139795  100.00000
## OTU_6047  0.001099915  100.00000
## OTU_8436  0.001075473  100.00000
## OTU_5428  0.001070970  100.00000
## OTU_3789  0.001064538  100.00000
## OTU_5817  0.001042025  100.00000
## OTU_5815  0.001025301  100.00000
## OTU_2789  0.001014366  100.00000
## OTU_2980  0.001006648  100.00000
## OTU_790   0.001005361  100.00000
## OTU_3308  0.002082121   97.67442
## OTU_4036  0.001322471   97.67442
## OTU_3347  0.001297386   97.67442
## OTU_3162  0.002286023   95.34884
## OTU_5633  0.002238424   95.34884
## OTU_2877  0.002066683   95.34884
## OTU_5301  0.001794599   95.34884
## OTU_27934 0.001615139   95.34884
## OTU_4991  0.001551460   95.34884
## OTU_5624  0.001290953   95.34884
## OTU_5586  0.001207334   95.34884
## OTU_5493  0.001173886   95.34884
## OTU_5211  0.001081262   95.34884
## OTU_4083  0.001072257   95.34884
## OTU_1754  0.011149026   93.02326
## OTU_2893  0.008587059   93.02326
## OTU_761   0.008529169   93.02326
## OTU_2428  0.007645377   93.02326
## OTU_3736  0.007181611   93.02326
## OTU_1942  0.006453480   93.02326
## OTU_2466  0.006429681   93.02326
## OTU_5330  0.004964414   93.02326
## OTU_709   0.003061110   93.02326
## OTU_635   0.003025732   93.02326
## OTU_5727  0.002715698   93.02326
## OTU_1329  0.002241640   93.02326
## OTU_5432  0.002228776   93.02326
## OTU_5441  0.002136152   93.02326
## OTU_6164  0.002118784   93.02326
## OTU_3004  0.002071829   93.02326
## OTU_3023  0.001958621   93.02326
## OTU_8153  0.001957335   93.02326
## OTU_1738  0.001929676   93.02326
## OTU_5653  0.001828047   93.02326
## OTU_3240  0.001783021   93.02326
## OTU_3810  0.001757292   93.02326
## OTU_4926  0.001751503   93.02326
## OTU_5879  0.001743784   93.02326
## OTU_3680  0.001653732   93.02326
## OTU_5327  0.001590696   93.02326
## OTU_542   0.001498072   93.02326
## OTU_5691  0.001470413   93.02326
## OTU_5607  0.001467197   93.02326
## OTU_23    0.001443398   93.02326
## OTU_3198  0.001436966   93.02326
## OTU_5804  0.001425387   93.02326
## OTU_5651  0.001368140   93.02326
## OTU_5667  0.001355276   93.02326
## OTU_27938 0.001353989   93.02326
## OTU_5665  0.001334693   93.02326
## OTU_5742  0.001326974   93.02326
## OTU_5564  0.001316682   93.02326
## OTU_6102  0.001303175   93.02326
## OTU_5511  0.001293526   93.02326
## OTU_5449  0.001256219   93.02326
## OTU_5823  0.001253003   93.02326
## OTU_3214  0.001204761   93.02326
## OTU_5266  0.001177102   93.02326
## OTU_5976  0.001164881   93.02326
## OTU_5982  0.001111493   93.02326
## OTU_5821  0.001091553   93.02326
## OTU_5222  0.001090910   93.02326
## OTU_520   0.001058749   93.02326
## OTU_5499  0.001057463   93.02326
## OTU_223   0.001027231   93.02326
## OTU_5988  0.001011150   93.02326
## OTU_5800  0.001009864   93.02326
## OTU_3271  0.005586413   90.69767
## OTU_5398  0.005423033   90.69767
## OTU_2866  0.003430321   90.69767
## OTU_3311  0.002843056   90.69767
## OTU_5403  0.002407593   90.69767
## OTU_5752  0.001840911   90.69767
## OTU_5662  0.001409950   90.69767
## OTU_7407  0.001341125   90.69767
## OTU_3326  0.001315396   90.69767
## OTU_5288  0.001213123   90.69767
## OTU_5880  0.001172600   90.69767
## OTU_3207  0.001030447   90.69767
## OTU_3172  0.002888082   88.37209
## OTU_1676  0.002044814   88.37209
## OTU_6083  0.003165955   86.04651
## OTU_9255  0.001645371   86.04651
## OTU_5925  0.001328260   86.04651
## OTU_759   0.001018226   86.04651
## OTU_8011  0.003102276   83.72093
## OTU_5786  0.001188681   83.72093
## OTU_9267  0.001112780   83.72093
## OTU_3173  0.001013080   83.72093
## OTU_27901 0.003573117   81.39535
## OTU_3197  0.001815825   81.39535
dim(otu_tb16_ss36155_cosmop_sorted)
## [1] 138   2

Number and proportion (%) of cosmopolitan OTUs:

## [1] 138
## [1] 0.6982746

Number and proportion (%) of rare OTUs:

nrow(otu_tb16_ss36155_rabund_percoccur[otu_tb16_ss36155_rabund_percoccur$mean_rabund < 0.00001 & otu_tb16_ss36155_rabund_percoccur$mean_rabund >0,])
## [1] 13963
## [1] 70.65223